
The main idea was to make a model as simple as possible without the complication of preprocessing or postprocessing. For training the model, I used only one image of a ball which I found using a simple OpenCV function. After this, I pasted the same ball onto background photos I took from Kaggle. I divided the ball into 2 classes: magic-ball and a fixed color circle/ball. There are other wide-ranging possibilities to perform the task, but I chose this way because, in my understanding, it is the fastest (as you can see, it takes me an average of 70 ms per image) and requires minimum time for pre- and post-processing.
from IPython.display import clear_output
import torch
import cv2
import numpy as np
import albumentations as A
import cv2
from matplotlib import pyplot as plt
from PIL import Image, ImageDraw
import sys
import glob
from tqdm import tqdm
from ultralytics import YOLO
import time
from ensemble_boxes import *
import os
class config:
magic_ball_for_example = 'Balls/MVC-001F.JPG'
random_paths = glob.glob('stanford-background-dataset/images/*.jpg')
MAX_IMAGES = 10000
def find_ball_with_opencv(path):
img_rgb = cv2.imread(path)
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
img_b = cv2.medianBlur(img,5)
cimg = cv2.cvtColor(img_b,cv2.COLOR_GRAY2BGR)
circles = cv2.HoughCircles(img_b,cv2.HOUGH_GRADIENT,1,300,param1=100,param2=30,minRadius=0,maxRadius=100)
circles = np.uint16(np.around(circles))
for c in circles[0,:]:
cv2.circle(img_rgb,(c[0],c[1]),c[2],(0,255,0),7)
cv2.circle(img_rgb,(c[0],c[1]),2,(0,0,255),7)
bbox = [c[0]-c[2],c[1]-c[2],c[0]+c[2],c[1]+c[2]]
return img_rgb,bbox
img_rgb,bbox = find_ball_with_opencv(config.magic_ball_for_example)
plt.imshow(img_rgb[:,:,::-1])
plt.show()
transform = A.Compose([
A.ShiftScaleRotate(p=1.0,shift_limit=0.1,scale_limit=0.25,rotate_limit=360,),
A.Downscale(p=0.75),
A.GaussianBlur(p=0.75)
])
def gen_random_magic_ball(color_image,transform,min_imgz=64):
transformed = transform(image=color_image)
transformed_image = transformed["image"]
img=Image.fromarray(transformed_image)
npImage=np.array(img)
h,w=img.size
alpha = Image.new('L', img.size,0)
draw = ImageDraw.Draw(alpha)
draw.pieslice([0,0,h,w],0,360,fill=255)
npAlpha=np.array(alpha)
if np.random.randint(2)==0:
npImage=npImage.astype(float) * np.expand_dims(npAlpha.astype(bool).astype(float),-1)
c = 0
else:
npImage=np.ones_like(npImage.astype(float)) * np.expand_dims(npAlpha.astype(bool).astype(float),-1)
if np.random.randint(4) !=0:
npImage[:,:,0] *= np.random.randint(256)
npImage[:,:,1] *= np.random.randint(256)
npImage[:,:,2] *= np.random.randint(256)
elif np.random.randint(4) !=0:
npImage *= np.random.randint(64)
else:
npImage[:,:,1] *= np.random.randint(256)
npImage[:,:,0] *= 0
npImage[:,:,2] *= 0
c=1
size = min_imgz+np.random.randint(min_imgz)
return cv2.resize(npImage.astype(np.uint8),(size,size)),c
def get_random_background(random_paths=config.random_paths):
r_num = np.random.randint(len(random_paths))
return cv2.imread(random_paths[r_num])
def copy_paste(back_image,img):
h,w,_ = back_image.shape
img_gen,c = gen_random_magic_ball(img,transform,min_imgz=min(h,w)//10)
new_image = np.zeros_like(back_image)
startY = np.random.randint(h-img_gen.shape[0])
startX = np.random.randint(w-img_gen.shape[1])
new_image[startY:startY+img_gen.shape[0],startX:startX+img_gen.shape[1]] = img_gen.copy()
fg = cv2.bitwise_or(back_image,back_image, mask=(1-new_image.astype(bool)).astype(np.uint8)[:,:,0])
fg2 = cv2.bitwise_or(new_image,new_image, mask=(new_image.astype(bool)).astype(np.uint8)[:,:,0])
return fg+fg2,[startX,startY,startX+img_gen.shape[1],startY+img_gen.shape[0],c]
img_ball = cv2.imread(config.magic_ball_for_example)[:,:,::-1]
img_ball = img_ball[bbox[1]:bbox[3],bbox[0]:bbox[2]]
plt.imshow(img_ball)
<matplotlib.image.AxesImage at 0x7f662c1762d0>
plt.figure(figsize=[20,20])
for row in range(9):
bboxes = []
plt.subplot(3,3,row+1)
results = get_random_background(config.random_paths)[:,:,::-1]
for _ in range(1+ np.random.randint(10)):
results,bbox2 = copy_paste(results,img_ball)
bboxes.append(bbox2)
if bbox2[-1]==0:
c = 255
else:
c = 0
cv2.rectangle(results,(bbox2[0],bbox2[1]),(bbox2[2],bbox2[3]),c,4)
plt.imshow(results)
for num_image in tqdm(range(config.MAX_IMAGES)):
bboxes = []
results = get_random_background(random_paths)[:,:,::-1]
for _ in range(1+ np.random.randint(5)):
results,bbox2 = copy_paste(results,img_ball)
bboxes.append(bbox2)
cv2.imwrite(f"train/{num_image}.jpg",results[:,:,::-1])
bboxes = np.array(bboxes).astype(float)
h,w,_ = results.shape
with open(f'train/{num_image}.txt', 'w+') as f:
for b in bboxes:
class_num = b[4]
norm_w = (b[2]-b[0])/w
norm_h = (b[3]-b[1])/h
norm_x_center = b[0]/w + norm_w/2
norm_y_center = b[1]/h + norm_h/2
row = [class_num,norm_x_center,norm_y_center,norm_w,norm_h]
row = np.array(row).astype(float).astype(str)
text = ' '.join(row)
f.write(text)
f.write("\n")
all_data = glob.glob("train/*.jpg")
with open(f'train.txt', 'w+') as f:
for row in all_data[:config.MAX_IMAGES-200]:
f.write(row)
f.write("\n")
with open(f'val.txt', 'w+') as f:
for row in all_data[config.MAX_IMAGES-200:]:
f.write(row)
f.write("\n")

from ultralytics import YOLO
model = YOLO("yolov8n.pt")
results = model.train(data='ball.yaml', epochs=6, imgsz=640,save=True,exist_ok=True,plots=True,val=True,mixup=0.75,batch=32,close_mosaic=0,name="magic_ball")
Ultralytics YOLOv8.0.145 🚀 Python-3.7.10 torch-1.8.1+cu111 CUDA:0 (NVIDIA RTX 6000 Ada Generation, 48640MiB) WARNING ⚠️ Upgrade to torch>=2.0.0 for deterministic training. engine/trainer: task=detect, mode=train, model=yolov8n.pt, data=ball.yaml, epochs=6, patience=50, batch=32, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=magic_ball, exist_ok=True, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, fraction=1.0, profile=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=False, int8=False, dynamic=False, simplify=False, opset=None, workspace=4, nms=False, lr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=7.5, cls=0.5, dfl=1.5, pose=12.0, kobj=1.0, label_smoothing=0.0, nbs=64, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.75, copy_paste=0.0, cfg=None, tracker=botsort.yaml, save_dir=runs/detect/magic_ball Overriding model.yaml nc=80 with nc=2 from n params module arguments 0 -1 1 464 ultralytics.nn.modules.conv.Conv [3, 16, 3, 2] 1 -1 1 4672 ultralytics.nn.modules.conv.Conv [16, 32, 3, 2] 2 -1 1 7360 ultralytics.nn.modules.block.C2f [32, 32, 1, True] 3 -1 1 18560 ultralytics.nn.modules.conv.Conv [32, 64, 3, 2] 4 -1 2 49664 ultralytics.nn.modules.block.C2f [64, 64, 2, True] 5 -1 1 73984 ultralytics.nn.modules.conv.Conv [64, 128, 3, 2] 6 -1 2 197632 ultralytics.nn.modules.block.C2f [128, 128, 2, True] 7 -1 1 295424 ultralytics.nn.modules.conv.Conv [128, 256, 3, 2] 8 -1 1 460288 ultralytics.nn.modules.block.C2f [256, 256, 1, True] 9 -1 1 164608 ultralytics.nn.modules.block.SPPF [256, 256, 5] 10 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] 11 [-1, 6] 1 0 ultralytics.nn.modules.conv.Concat [1] 12 -1 1 148224 ultralytics.nn.modules.block.C2f [384, 128, 1] 13 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] 14 [-1, 4] 1 0 ultralytics.nn.modules.conv.Concat [1] 15 -1 1 37248 ultralytics.nn.modules.block.C2f [192, 64, 1] 16 -1 1 36992 ultralytics.nn.modules.conv.Conv [64, 64, 3, 2] 17 [-1, 12] 1 0 ultralytics.nn.modules.conv.Concat [1] 18 -1 1 123648 ultralytics.nn.modules.block.C2f [192, 128, 1] 19 -1 1 147712 ultralytics.nn.modules.conv.Conv [128, 128, 3, 2] 20 [-1, 9] 1 0 ultralytics.nn.modules.conv.Concat [1] 21 -1 1 493056 ultralytics.nn.modules.block.C2f [384, 256, 1] 22 [15, 18, 21] 1 751702 ultralytics.nn.modules.head.Detect [2, [64, 128, 256]] Model summary: 225 layers, 3011238 parameters, 3011222 gradients, 8.2 GFLOPs Transferred 319/355 items from pretrained weights TensorBoard: Start with 'tensorboard --logdir runs/detect/magic_ball', view at http://localhost:6006/ AMP: running Automatic Mixed Precision (AMP) checks with YOLOv8n... AMP: checks passed ✅ train: Scanning train... 9800 images, 0 backgrounds, 0 corrupt: 100%|██████████| 9800/9800 [00:09<00:00, 997.25it/s] train: New cache created: train.cache albumentations: Blur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8)) /home/kaor/.conda/envs/mmdet/lib/python3.7/site-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 8 worker processes in total. Our suggested max number of worker in current system is 6, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. cpuset_checked)) val: Scanning train... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<00:00, 1213.90it/s] val: New cache created: train.cache /home/kaor/.conda/envs/mmdet/lib/python3.7/site-packages/torch/utils/data/dataloader.py:477: UserWarning: This DataLoader will create 16 worker processes in total. Our suggested max number of worker in current system is 6, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary. cpuset_checked)) Plotting labels to runs/detect/magic_ball/labels.jpg... optimizer: AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0) Image sizes 640 train, 640 val Using 8 dataloader workers Logging results to runs/detect/magic_ball Starting training for 6 epochs... Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 1/6 5.93G 0.4592 0.8717 0.8664 77 640: 100%|██████████| 307/307 [00:55<00:00, 5.56it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 4/4 [00:01<00:00, 3.12it/s] all 200 575 0.999 0.965 0.97 0.954 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 2/6 5.96G 0.3396 0.3815 0.8254 83 640: 100%|██████████| 307/307 [00:53<00:00, 5.72it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 4/4 [00:01<00:00, 3.06it/s] all 200 575 0.998 0.969 0.972 0.965 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 3/6 5.96G 0.3104 0.3271 0.8191 88 640: 100%|██████████| 307/307 [00:52<00:00, 5.87it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 4/4 [00:01<00:00, 3.23it/s] all 200 575 0.997 0.966 0.971 0.965 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 4/6 5.94G 0.2918 0.2997 0.815 57 640: 100%|██████████| 307/307 [00:52<00:00, 5.86it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 4/4 [00:01<00:00, 3.12it/s] all 200 575 1 0.97 0.971 0.968 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 5/6 5.93G 0.2813 0.2852 0.8131 77 640: 100%|██████████| 307/307 [00:52<00:00, 5.83it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 4/4 [00:01<00:00, 2.95it/s] all 200 575 1 0.97 0.972 0.969 Epoch GPU_mem box_loss cls_loss dfl_loss Instances Size 6/6 5.96G 0.2658 0.2674 0.8104 52 640: 100%|██████████| 307/307 [00:52<00:00, 5.81it/s] Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 4/4 [00:02<00:00, 1.39it/s] all 200 575 1 0.97 0.974 0.971 6 epochs completed in 0.094 hours. Optimizer stripped from runs/detect/magic_ball/weights/last.pt, 6.2MB Optimizer stripped from runs/detect/magic_ball/weights/best.pt, 6.2MB Validating runs/detect/magic_ball/weights/best.pt... Ultralytics YOLOv8.0.145 🚀 Python-3.7.10 torch-1.8.1+cu111 CUDA:0 (NVIDIA RTX 6000 Ada Generation, 48640MiB) Model summary (fused): 168 layers, 3006038 parameters, 0 gradients, 8.1 GFLOPs Class Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 4/4 [00:02<00:00, 1.55it/s] all 200 575 1 0.97 0.974 0.971 magic_ball 200 293 1 0.997 0.995 0.993 ball 200 282 1 0.943 0.952 0.95 Speed: 1.1ms preprocess, 0.5ms inference, 0.0ms loss, 0.5ms postprocess per image Results saved to runs/detect/magic_ball

start_time = time.time()
all_images = []
paths = glob.glob(f"{path}*")
for p in paths:
s = time.time()
img = cv2.imread(p)
h,w,_ = img.shape
n = max(h,w)+1
res640 = model(img[:,:,::-1],
save=False,
augment=True,
imgsz=640,
conf=0.3,
classes=[0,1],
exist_ok=True)
res320 = model(img[:,:,::-1],
save=False,
augment=True,
imgsz=320,
conf=0.3,
classes=[0,1],
exist_ok=True)
res320 = res320[0].boxes.data.cpu().numpy()/n
res640 = res640[0].boxes.data.cpu().numpy()/n
res320 = res320[res320[:, 5].argsort()]
res640 = res640[res640[:, 5].argsort()]
if len(res320)==0 and len(res640)!=0:
boxes_list = [res640[:,:4].tolist()]
scores_list = [res640[:,4].tolist()]
labels_list = [res640[:,5].tolist()]
elif len(res640)==0 and len(res320)!=0:
boxes_list = [res320[:,:4].tolist()]
scores_list = [res320[:,4].tolist()]
labels_list = [res320[:,5].tolist()]
elif len(res640)!=0 and len(res320)!=0:
boxes_list = [res320[:,:4].tolist(),res640[:,:4].tolist()]
scores_list = [res320[:,4].tolist(),res640[:,4].tolist()]
labels_list = [res320[:,5].tolist(),res640[:,5].tolist()]
else:
all_images.append([p,img,time.time()-s])
continue
boxes, scores, labels = nms(boxes_list, scores_list, labels_list, weights=[1.0,1.0], iou_thr=0.1)
boxes*=n
wx = boxes[:,2] - boxes[:,0]
hy = boxes[:,3] - boxes[:,1]
xcenter = boxes[:,0] + wx/2
ycenter = boxes[:,1] + hy/2
for x,y,ww,hh,l in zip(xcenter,ycenter,wx,hy,labels):
if ww < 20 or hh<20:
continue
raduis = max(ww,hh)
if l==0:
cv2.circle(img,(int(x),int(y)),int(raduis/2.0),(200,50,100) if l==0 else (0,255,0),5)
# else:
# cv2.circle(img,(int(x),int(y)),int(5),(255,0,0) if l==0 else (0,255,0),10)
all_images.append([p,img,time.time()-s])
all_time_p = time.time()-start_time
clear_output()
print(f"cell time: {all_time_p}")
cell time: 0.9948115348815918
plt.figure(figsize=[20,20])
for i,(p,im,t) in enumerate(all_images):
plt.subplot(4,4,i+1)
plt.imshow(im[:,:,::-1])
plt.title(f"predict time - {round(t*1000,3)} ms")
cv2.imwrite(p.replace("Balls","results"),im)
